version 13 // indicates version
cls // clears backscroll display buffer
clear all // "start from scratch"
set more off // suppresses -more- prompts

/* load data */
cd "C:\Users\benja\Dropbox\BenjaminWork\ResearchProject\data\creation\data-sets and merge\background\recode"
use "background.dta" 

/* rename variables */
rename nomem_encr id_p
rename nohouse_encr id_hh

/* variable year */
forvalues i = 2007/2015 {
	local start = `i' * 100
	local end = (`i' + 1) * 100
	regen year = `i' if (wave >= `start' & wave < `end'), replace
	// wave has the format e.g. 200711
	// hence to reduce this combination of yearly monthly information,
	// we do 2007*100=200700, which is the start of 2007
	// and smaller than 2008*100=200800 is the end of 2007
}

/* number of observation per individual per year */
bys id_p year (wave): gen N = _N
bys id_p year (wave): gen n = _n

/// HOUSEHOLD ///

/* SIM PC */
bys id_p year (wave): gen simpc_hh = simpc[_N] 
label var simpc_hh "Sim PC (1=yes)"
forvalues i = 1/11 { 
	bys id_p year (wave): replace simpc_hh = simpc[_N-`i'] if (simpc_hh == .) &  (_N-`i' > 0)
}

/* age of head of household */
recode lftdhhh (min/17=.)
bys id_p year (wave): gen age_hhh = lftdhhh[_N]
label var age_hhh "Age in years of head of household"
forvalues i = 1/11 { 
	bys id_p year (wave): replace age_hhh = lftdhhh[_N-`i'] if (age_hhh == .) &  (_N-`i' > 0)
}

// explanation for this (reoccuring) type of recoding: //
// "id_p year (wave):" sort and partion the dataset so that the thereafter following "replace" command
// is done separately for each individual, and within each individual separately for each year
// example: the "replace" command is done separately for id = 1 & year = 2007; id = 1 & year = 2008; ...
// as the dataset is sorted ascendingly according to wave, the variable age_hhh takes on the N-th value lftdhhh, i.e. the lastest wave of each year (usually december)
// if it is missing, the forvalues command repeats replacing this missing value (as long as it is missing) with the value of wave N-1, N-2, ..., 1

/* partner of hhh (yes/no) */
bys id_p year (wave): gen partner_hhh = partner[_N]
label var partner_hhh "Head of household lives together with a partner (1=yes)"
forvalues i = 1/11 { 
	bys id_p year (wave): replace partner_hhh = partner[_N-`i'] if (partner_hhh == .) &  (_N-`i' > 0)
}

/* living status / relationship status of hhh */
bys id_p year (wave): gen livstatus_hhh = woonvorm[_N]
label variable livstatus_hhh "Domestic situation of head of household"
label define l_livstatus 1 "Single - Children" 2 "(Un)married cohabitation - children" 3 "(Un)married cohabitation + children" 4 "Single + Children" 5 "Other"
label values livstatus_hhh l_livstatus 
forvalues i = 1/11 { 
	bys id_p year (wave): replace livstatus_hhh = woonvorm[_N-`i'] if (livstatus_hhh == .) &  (_N-`i' > 0)
}

/* number of household members */
recode aantalhh (10/15=9)
bys id_p year (wave): gen hhsize = aantalhh[_N]
label var hhsize "N of household members"
forvalues i = 1/11 { 
	bys id_p year (wave): replace hhsize = aantalhh[_N-`i'] if (hhsize == .) &  (_N-`i' > 0)
}

/* number of children in household */
recode aantalki (10/15=9)
bys id_p year (wave): gen hhchildren = aantalki[_N]
label var hhchildren "N of children in household"
forvalues i = 1/11 { 
	bys id_p year (wave): replace hhchildren = aantalki[_N-`i'] if (hhchildren == .) &  (_N-`i' > 0)
}

/* type of dwelling */
mvdecode woning, mv(9)
bys id_p year (wave): gen hhtype = woning[_N]
label variable hhtype "Type of dwelling"
label define l_hhtype 1 "self-owned" 2 "rental" 4 "cost-free" 
label values hhtype l_hhtype
forvalues i = 1/11 { 
	bys id_p year (wave): replace hhtype = woning[_N-`i'] if (hhtype == .) &  (_N-`i' > 0)
}

// dummy //
qui tab hhtype, gen(hhtype)
rename hhtype1 hhtype_sel
label var hhtype_sel "Type of dwelling: 1=self-owned"
rename hhtype2 hhtype_ren
label var hhtype_ren "Type of dwelling: 1=rental"


/* level of urbanization */
bys id_p year (wave): gen hhurban = sted[_N]
replace hhurban = 6 - hhurban // has to be reversed
label var hhurban "Level of urbanization (1=not urban, 5=extremly urban)"
forvalues i = 1/11 { 
	bys id_p year (wave): replace hhurban = sted[_N-`i'] if (hhurban == .) &  (_N-`i' > 0)
}

/* household income */
bys id_p year (wave): gen income_hh = nettohh_f[_N]
label var income_hh "Net household income in Euros"
forvalues i = 1/11 { 
	bys id_p year (wave): replace income_hh = nettohh_f[_N-`i'] if (income_hh == .) &  (_N-`i' > 0)
}

/* wave of recruitment */
clonevar recruit = werving
bys id_p year (wave): replace recruit = werving[_N]
forvalues i = 1/11 { 
	bys id_p year (wave): replace recruit = werving[_N-`i'] if (recruit == .) &  (_N-`i' > 0)
}

/// OBSERVATION ///

/* ID */
label var id_p "number of observation"

/* year of birth */
bys id_p year (wave): gen birthyear = gebjaar[_N]
label var birthyear "Year of birth of observation"
forvalues i = 1/11 { 
	bys id_p year (wave): replace birthyear = gebjaar[_N-`i'] if (birthyear == .) &  (_N-`i' > 0)
}

// age at first wave //
gen age = 2008 - birthyear
label var age "Age at first wave"
recode age (min/15 = .)
	
/* gender */
bys id_p year (wave): gen female = (geslacht[_N] == 2)
label var female "Gender of observation (1=female)"

/* position within the household */
mvdecode positie, mv(9)
clonevar position = positie 
bys id_p year (wave): replace position = positie[_N]
forvalues i = 1/11 { 
	bys id_p year (wave): replace position = positie[_N-`i'] if (position == .) &  (_N-`i' > 0)
}

// dummies //
qui tab position, gen(pos)
rename pos2 pos_part 
label var pos_part "Position within the HH: 1=Partner"
rename pos5 pos_child
label var pos_child "Position within the HH: 1=Child"

/* civil status */
clonevar civilstatus = burgstat
bys id_p year (wave): replace civilstatus = burgstat[_N]
forvalues i = 1/11 { 
	bys id_p year (wave): replace civilstatus = burgstat[_N-`i'] if (civilstatus == .) &  (_N-`i' > 0)
}

// dummies //
qui tab civilstatus, gen(civil)
rename civil1 civil_marr
label var civil_marr "Civil status: 1=Married"

recode civilstatus (1=0) (2/3=1 "separated/divorced") (4/5=0), gen(civil_div)
label var civil_div "Civil status: 1=Divorced/Separated"

rename civil4 civil_wid
label var civil_wid "Civil status: 1=Widowed"

rename civil5 civil_nev
label var civil_nev "Civil status: 1=Never married"

/* occupation */
clonevar occ = belbezig
bys id_p year (wave): replace occ = belbezig[_N]
forvalues i = 1/11 { 
	bys id_p year (wave): replace occ = belbezig[_N-`i'] if (occ == .) &  (_N-`i' > 0)
}
// simplified //
recode occ (1/3 = 1 "employed") (4/5 = 2 "job seeker") (7 14 = 3 "child / student") (6 11 = 4 "social welfare") (8 12 = 5 "home maker") (9/10 = 6 "pensioner") (13 = 7 "other"), gen(occ_cat)
label var occ_cat "condensed occ"

// dummies //
qui tab occ_cat, gen(occ)
rename occ1 occ_emp
label var occ_emp "Employment: 1=employed"
rename occ2 occ_seek 
label var occ_seek "Employment: 1=job seeker"
rename occ3 occ_stu
label var occ_stu "Employment: 1=student / child"
rename occ4 occ_soc
label var occ_soc "Employment: 1=social welfare"
rename occ5 occ_home
label var occ_home "Employment: 1=home maker"
rename occ6 occ_pen
label var occ_pen "Employment: 1=pensioner"


/* income */
recode brutoink_f (3911101/max=.)
clonevar income = brutoink_f
bys id_p year (wave): replace income = brutoink_f[_N]
forvalues i = 1/11 { 
	bys id_p year (wave): replace income = brutoink_f[_N-`i'] if (income == .) &  (_N-`i' > 0)
}

/* education */
clonevar edu = oplcat
bys id_p year (wave): replace edu = oplcat[_N]
forvalues i = 1/11 { 
	bys id_p year (wave): replace edu = oplcat[_N-`i'] if (edu == .) &  (_N-`i' > 0)
}

/* migrant status */
clonevar migrant = herkomstgroep
bys id_p year (wave): replace migrant = herkomstgroep[_N]
forvalues i = 1/11 { 
	bys id_p year (wave): replace migrant = herkomstgroep[_N-`i'] if (migrant == .) &  (_N-`i' > 0)
}

// dummy 
recode migrant (0 = 0 "Dutch") (101/202 = 1 "Migrant") (else = .), gen(migrantd)
label var migrantd "Migrant 1 vs. Dutch 0"

save bg_cr, replace

// to not do this very long procedure over and over again

use bg_cr.dta, clear

// keep only last observation per year //
bys id_p year (wave): keep if n == N
drop geslacht positie gebjaar lftdhhh aantalhh aantalki partner burgstat woonvorm woning sted belbezig brutoink_f nettohh_f oplcat werving herkomstgroep simpc
// drop hhtype hhtype3 position pos1 pos3 pos4 pos6 pos7 civilstatus civil2 civil3 occ occ_cat occ7 migrant // without categorical variables, only dummies left
drop hhtype3 pos1 pos3 pos4 pos6 pos7 civil2 civil3 occ occ7 migrant // with categorical and dummy variables

/* create proper long-file */
drop n N wave
rename year wave
replace wave = wave - 2000 // in other data sets the wave is recoded as 07, 08 etc.
rename id_p id

bys id wave: gen N = _N
assert N == 1
drop N

// stata2mplus using backgroundLong, replace

////////////// STATA FILE FOR MERGING ///////////////////////
saveold d01_bv, replace
/////////////////////////////////////////////////////////////




